# !diagnostics off

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(plotlyutils)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(Rtsne)
library(ClusterR)
library(DESeq2)
library(expss)
library(knitr) ; library(kableExtra)

Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
genes_info = DE_info %>% data.frame

# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)

# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]

# Update DE_info with SFARI and Neuronal information
genes_info = genes_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
             mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
             distinct(ID, .keep_all = TRUE) %>% left_join(GO_neuronal, by='ID') %>%
             mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
             mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`), 
                    significant=padj<0.05 & !is.na(padj)) %>%
             mutate(Group = factor(ifelse(gene.score %in% c('Neuronal','Others'), gene.score, 'SFARI'), 
                    levels = c('SFARI', 'Neuronal', 'Others')))



SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

rm(GO_annotations)

SFARI Gene list


There are 912 genes with a SFARI score, but to map them to the gene expression dataset we had to map the gene names to their corresponding ensembl IDs

Mapping SFARI Gene names to Ensembl IDs


There are 1116 Ensembl IDs corresponding to the 912 genes in the SFARI Gene dataset

  • Since a gene can have more than one ensembl ID, there were some one-to-many mappings between a gene name and ensembl IDs, so that’s why we ended up with 1116 rows in the SFARI_genes dataset.

  • The details about how the genes were annotated with their Ensembl IDs can be found in SecondYear/SFARI/RMarkdowns/get_ensembl_ids_new_SFARI.html


There are 87 genes in the SFARI list without a score, of which 0 don’t have syndromic tag either



Exploratory Analysis


There are 767 SFARI Genes in the expression dataset (~69%)


Of these, only 692 have an assigned score


From now on, we’re only going to focus on these 692 genes with a score

Gene count by SFARI score:

table_info = genes_info %>% apply_labels(`gene-score` = 'SFARI Gene Score', syndromic = 'Syndromic Tag',
                                          Neuronal = 'Neuronal Function', gene.score = 'Gene Score') %>%
             mutate(syndromic = as.logical(syndromic), Neuronal = as.logical(Neuronal))

cro(table_info$`gene-score`)
 #Total 
 SFARI Gene Score 
   1  141
   2  185
   3  366
   Others  12470
   #Total cases  13162


Gene count by Syndromic tag:

cro(table_info$syndromic)
 #Total 
 Syndromic Tag 
   FALSE  651
   TRUE  116
   #Total cases  767


GO Neuronal annotations:


968 genes have neuronal-related annotations

146 of these genes have a SFARI score

cro(table_info$gene.score[genes_info$`gene-score` %in% as.character(c(1:3))],
    list(table_info$Neuronal[genes_info$`gene-score` %in% as.character(c(1:3))], total()))
 Neuronal Function     #Total 
 FALSE   TRUE   
 Gene Score 
   1  103 38   141
   2  144 41   185
   3  299 67   366
   #Total cases  546 146   692
rm(table_info)





All SFARI scores together



Gene Expression


Larger mean expression than the other two groups, smaller SD than Neuronal genes

Note: SD plot was truncated so not all outliers are included

plot_data = data.frame('ID'=rownames(datExpr), 'MeanExpr'=rowMeans(datExpr), 'SDExpr'=apply(datExpr,1,sd)) %>% 
            left_join(genes_info, by='ID') %>%
            mutate(Group = factor(ifelse(gene.score %in% c('Neuronal','Others'), gene.score, 'SFARI'), 
                                  levels = c('SFARI', 'Neuronal', 'Others')))

comparisons = list(c('SFARI','Neuronal'), c('Neuronal','Others'), c('SFARI','Others'))
increase = 1
base = 14.5
pos_y_comparisons = c(1:3*increase + base)

p1 = plot_data %>% ggplot(aes(Group, MeanExpr, fill=Group)) + 
     geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) + 
     stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                        method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, tip.length = .02) +
     scale_fill_manual(values=c('#00A4F7', SFARI_colour_hue(r=c(8,7)))) + 
     xlab('') + ylab('Mean Expression') +  ggtitle('Mean Expression Comparison') +
     theme_minimal() + theme(legend.position='none')

increase = 0.05
base = 0.75
pos_y_comparisons = c(1:3*increase + base)

p2 = plot_data %>% ggplot(aes(Group, SDExpr, fill=Group)) + 
     geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) + 
     stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                        method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, tip.length = .01) +
     scale_fill_manual(values=c('#00A4F7', SFARI_colour_hue(r=c(8,7)))) + 
     coord_cartesian(ylim= c(0.05, max(pos_y_comparisons))) +
     xlab('') + ylab('Standard Deviation') +  ggtitle('Standard Deviation Comparison') +
     theme_minimal() + theme(legend.position='none')

grid.arrange(p1, p2, nrow=1)

rm(p1, p2, increase, base, pos_y_comparisons)


Log Fold Change


Proportion of over- and under-expressed genes is very similar between groups: approximately half

genes_info %>% mutate(direction = ifelse(log2FoldChange>0, 'over-expressed', 'under-expressed')) %>% 
               group_by(Group, direction) %>% tally(name = 'over_expressed') %>% 
               filter(direction == 'over-expressed') %>% ungroup %>% 
               left_join(genes_info %>% group_by(Group) %>% tally(name = 'Total'), by = 'Group') %>% ungroup %>%
               mutate('prop_over_expressed' = round(over_expressed/Total,3)) %>% 
               dplyr::select(-direction) %>% kable %>% kable_styling(full_width = F)
Group over_expressed Total prop_over_expressed
SFARI 378 692 0.546
Neuronal 431 822 0.524
Others 6200 11648 0.532

Similar LFC Magnitude than the rest of the genes, perhaps a bit lower than Neuronal genes, but the differences are not statistically significant in any case

increase = 0.04
base = 0.6
pos_y_comparisons = c(1:3*increase + base)

plot_data %>% ggplot(aes(Group, abs(log2FoldChange), fill=Group)) + 
              geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) + 
              stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                                 method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, 
                                 tip.length = .005) +
     scale_fill_manual(values=c('#00A4F7', SFARI_colour_hue(r=c(8,7)))) + 
     coord_cartesian(ylim= c(0.05, max(pos_y_comparisons))) +
     xlab('') + ylab('LFC Magnitude') +  ggtitle('LFC Magnitude Comparison') +
     theme_minimal() + theme(legend.position='none')

rm(increase, base, pos_y_comparisons)
  • SFARI Genes, as a group, have less genes with high (positive) LFC than the rest of the genes in the dataset
plot_data = genes_info  %>% dplyr::select(Group, log2FoldChange) %>%
            mutate(quant = cut(log2FoldChange, breaks = quantile(log2FoldChange, probs = seq(0,1,0.05)) %>% 
                           as.vector, labels = FALSE),
                   value_range = cut(log2FoldChange, breaks = quantile(log2FoldChange, probs=seq(0,1,0.05)) %>% 
                                 as.vector)) %>% 
            filter(Group == 'SFARI') %>% group_by(quant, value_range) %>% tally %>% ungroup %>%
            left_join(genes_info  %>% dplyr::select(Group, log2FoldChange) %>%
                      mutate(quant = cut(log2FoldChange, breaks = quantile(log2FoldChange,
                                     probs = seq(0,1,0.05)) %>% as.vector, labels = FALSE)) %>% 
                      group_by(quant) %>% tally(name = 'tot') %>% ungroup) %>% mutate(p = 100*n/tot)

ggplotly(plot_data %>% ggplot(aes(quant, p)) + geom_smooth(color = 'gray', alpha = 0.1) + 
         geom_bar(stat = 'identity', fill = '#00A4F7', aes(id = value_range)) + 
         geom_hline(yintercept = 100*mean(genes_info$Group == 'SFARI'), color = 'gray', linetype = 'dotted') +
         xlab('Log Fold Change Quantiles') + ylab('% of SFARI Genes in each Quantile') + ggtitle('
Distribution of SFARI Genes in LFC Quantiles') + theme_minimal())
data.frame('Quantile' = 1:20, 'LFC Range' = cut(genes_info$log2FoldChange,
           breaks = quantile(genes_info$log2FoldChange, probs=seq(0,1,.05)) %>% as.vector) %>% table %>% names) %>%
           kable(caption = 'LFC ranges for each quantile') %>% kable_styling(full_width = F)
LFC ranges for each quantile
Quantile LFC.Range
1 (-1.46,-0.326]
2 (-0.326,-0.244]
3 (-0.244,-0.191]
4 (-0.191,-0.15]
5 (-0.15,-0.117]
6 (-0.117,-0.0906]
7 (-0.0906,-0.0614]
8 (-0.0614,-0.0354]
9 (-0.0354,-0.00888]
10 (-0.00888,0.016]
11 (0.016,0.0414]
12 (0.0414,0.0666]
13 (0.0666,0.0958]
14 (0.0958,0.127]
15 (0.127,0.16]
16 (0.16,0.199]
17 (0.199,0.245]
18 (0.245,0.314]
19 (0.314,0.432]
20 (0.432,2.15]



Differential Expression


Lower proportion of DE genes than genes with Neuronal annotation but slightly higher than the rest of the genes

genes_info %>% group_by(Group, significant) %>% tally(name = 'DE') %>% filter(significant) %>% ungroup %>%
               left_join(genes_info %>% group_by(Group) %>% tally(name = 'Total'), by = 'Group') %>% ungroup %>%
               mutate('prop_DE' = round(DE/Total,2)) %>% dplyr::select(-significant) %>% 
               kable(caption = 'Proportion of DE Genes by Group') %>% kable_styling(full_width = F)
Proportion of DE Genes by Group
Group DE Total prop_DE
SFARI 30 692 0.04
Neuronal 50 822 0.06
Others 380 11648 0.03

SFARI Genes have consistently lower percentage of DE genes than the Genes with Neuronal annotations and a slightly higher level to the rest of the genes

lfc_list = seq(1, 1.2, 0.01)

all_counts = data.frame('group'='All', 'n'=as.character(nrow(genes_info)))
Others_counts = data.frame('group'='Others', n=as.character(sum(genes_info$Group == 'Others')))
Neuronal_counts = data.frame('group'='Neuronal', n=as.character(sum(genes_info$Neuronal)))
lfc_counts_all = genes_info %>% filter(Group == 'SFARI') %>% tally %>%
                 mutate('group'='SFARI', 'n'=as.character(n)) %>%
                 dplyr::select(group, n) %>%
                 bind_rows(Neuronal_counts, Others_counts, all_counts) %>%
                 mutate('lfc'=-1) %>%  dplyr::select(lfc, group, n)

for(lfc in lfc_list){
  
  # Recalculate genes_info with the new threshold (p-values change)
  DE_genes = results(dds, lfcThreshold=log2(lfc), altHypothesis='greaterAbs') %>% data.frame %>% 
             mutate('ID' = rownames(.)) %>% 
             left_join(genes_info %>% dplyr::select(ID, Neuronal, gene.score, Group), by = 'ID') %>% 
             filter(padj<0.05 & abs(log2FoldChange)>log2(lfc))

  
  # Calculate counts by groups
  all_counts = data.frame('group'='All', 'n'=as.character(nrow(DE_genes)))
  Others_counts = data.frame('group'='Others', n=as.character(sum(DE_genes$Group == 'Others')))
  Neuronal_counts = data.frame('group'='Neuronal', n=as.character(sum(DE_genes$Neuronal)))
  lfc_counts = DE_genes %>% filter(Group == 'SFARI') %>% tally %>%
               mutate('group'='SFARI', 'n'=as.character(n)) %>%
               bind_rows(Neuronal_counts, Others_counts, all_counts) %>%
               mutate('lfc'=lfc) %>% dplyr::select(lfc, group, n)
  
  
  # Update lfc_counts_all
  lfc_counts_all = lfc_counts_all %>% bind_rows(lfc_counts)
}

# Add missing entries with 0s
lfc_counts_all = expand.grid('group'=unique(lfc_counts_all$group), 'lfc'=unique(lfc_counts_all$lfc)) %>% 
  left_join(lfc_counts_all, by=c('group','lfc')) %>% replace(is.na(.), 0)

# Calculate percentage of each group remaining
tot_counts = genes_info %>% filter(Group == 'SFARI') %>% tally() %>%
             mutate('group'='SFARI', 'tot'=n) %>% dplyr::select(group, tot) %>%
             bind_rows(data.frame('group'='Neuronal', 'tot'=sum(genes_info$Neuronal)),
                       data.frame('group' = 'Others', 'tot' = sum(genes_info$Group == 'Others')),
                       data.frame('group'='All', 'tot'=nrow(genes_info)))

lfc_counts_all = lfc_counts_all %>% filter(lfc!=-1) %>% #, group!='Others') %>% 
                 left_join(tot_counts, by='group') %>% mutate('perc'=round(100*as.numeric(n)/tot,2))


# Plot change of number of genes
ggplotly(lfc_counts_all %>% filter(group != 'All') %>% 
         mutate(group = factor(group, levels = c('SFARI', 'Neuronal', 'Others'))) %>%
         ggplot(aes(lfc, perc, color=group)) + geom_point(aes(id=n)) + geom_line() +
         scale_color_manual(values=c('#00A4F7', SFARI_colour_hue(r=c(8,7)))) + 
         ylab('% of Differentially Expressed Genes') +  xlab('Fold Change') +
         ggtitle('Effect of filtering thresholds in SFARI Genes') + theme_minimal())







Grouping Genes by SFARI Gene Score


Gene Expression




Normalised data

  • The higher the SFARI score, the higher the mean expression of the gene: This pattern is quite strong and it doesn’t have any biological interpretation, so it’s probably bias in the SFARI score assignment

  • The higher the SFARI score, the lower the standard deviation: This pattern is not as strong, but it is weird because the data was originally heteroscedastic with a positive relation between mean and variance, so the fact that the relation now seems to have reversed could mean that the vst normalisation ended up affecting the highly expressed genes more than it should have when trying to correct their higher variance

plot_data = data.frame('ID'=rownames(datExpr), 'MeanExpr'=rowMeans(datExpr), 'SDExpr'=apply(datExpr,1,sd)) %>% 
            left_join(genes_info, by='ID')

comparisons = list(c('1','2'), c('2','3'), c('3','Neuronal'), c('Neuronal','Others'),
                   c('1','3'), c('3','Others'), c('2','Neuronal'),
                   c('1','Neuronal'), c('2','Others'), c('1','Others'))
increase = 1
base = 15.5
pos_y_comparisons = c(rep(base, 4), rep(base + increase, 2), base + 2:5*increase)
                      
p1 = plot_data %>% ggplot(aes(gene.score, MeanExpr, fill=gene.score)) + 
     geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
     stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                        method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, tip.length = .02) +       
     scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + 
     xlab('SFARI Gene Scores') + ylab('Mean Expression') + 
     theme_minimal() + theme(legend.position='none')

increase = 0.05
base = 0.8
pos_y_comparisons = c(rep(base, 4), rep(base + increase, 2), base + 2:5*increase)
p2 = plot_data %>% ggplot(aes(gene.score, SDExpr, fill=gene.score)) + 
     geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
     stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                        method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, tip.length = .005) +
     scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) +
     coord_cartesian(ylim= c(0.05, max(pos_y_comparisons))) +
     xlab('SFARI Gene Scores') + ylab('Standard Deviation') +
     theme_minimal() + theme(legend.position='none')

grid.arrange(p1, p2, nrow=1)

rm(p1, p2, base, increase, pos_y_comparisons)


Raw data

Just to corroborate that the relation between sd and SFARI score used to be in the opposite direction before the normalisation: The higher the SFARI score the higher the mean expression and the higher the standard deviation

*There are a lot of outliers, but the plot is interactive so you can zoom in

# Save preprocessed results
datExpr_prep = datExpr
datMeta_prep = datMeta
genes_info_prep = genes_info

load('./../Data/filtered_raw_data.RData')

plot_data = data.frame('ID'=rownames(datExpr), 'MeanExpr'=rowMeans(datExpr), 'SDExpr'=apply(datExpr,1,sd)) %>% 
            left_join(genes_info, by='ID')

p1 = ggplotly(plot_data %>% ggplot(aes(gene.score, MeanExpr, fill=gene.score)) + 
              geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) + 
              scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + theme_minimal() +
              theme(legend.position='none'))

p2 = ggplotly(plot_data %>% ggplot(aes(gene.score, SDExpr, fill=gene.score)) + 
              geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) + 
              scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + theme_minimal() +
              ggtitle('Mean Expression (left) and SD (right) by SFARI score') + 
              theme(legend.position='none'))

plotly::subplot(p1, p2, nrows=1)
#Return to normalised version of the data
datExpr = datExpr_prep
datMeta = datMeta_prep
genes_info = genes_info_prep

rm(plot_data, p1, p2, datExpr_prep, datMeta_prep, genes_info_prep)




Log Fold Change


Log Fold Change Direction


The proportion of over- and under-expressed genes in each SFARI Gene score is not very different to the proportion in the genes iwth Neuronal annotations nor in the rest of the genes (good, something less to worry about)

aux = genes_info %>% dplyr::select(ID, log2FoldChange, gene.score) %>%
      left_join(data.frame('ID' = rownames(datExpr), 'meanExpr' = rowMeans(datExpr)), by = 'ID') %>%
      dplyr::mutate(direction = ifelse(log2FoldChange>0, 'over-expressed', 'under-expressed'))

plot_data = aux %>% group_by(gene.score, direction) %>% tally(name = 'p') %>%
            left_join(aux %>% group_by(gene.score) %>% tally, by = 'gene.score') %>% mutate(p = p/n, y=1)


plot_data %>% ggplot(aes(gene.score, p, fill=direction)) + geom_bar(stat='identity') + 
              geom_hline(yintercept = mean(plot_data$p[plot_data$direction=='under-expressed']), 
                         linetype = 'dashed', color = 'white') + 
              ylab('Proportion') + xlab('SFARI Gene Scores') + 
              ggtitle('Direction of Fold-Change in genes by SFARI Score') + theme_minimal()

rm(aux)

Log Fold Change Magnitude


The relation between LFC Magnitude and SFARI Gene Scores is not very strong here, it seems like SFARI Genes with a score of 1 have the lowest LFC Magnitude of all scores and genes with a score of 3 the highest, but the difference is only statistically significant when comparing Score 1 vs Score 2

Note: For clarity, the plot was truncated removing some outlier values

increase = 0.04
base = 0.55
pos_y_comparisons = c(rep(base, 4), rep(base + increase, 2), base + 2:5*increase)
                      
genes_info %>% ggplot(aes(gene.score, abs(log2FoldChange), fill=gene.score)) + 
     geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
     stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                        method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, tip.length = .005) +       
     scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + 
     coord_cartesian(ylim = c(0, max(pos_y_comparisons))) +
     xlab('SFARI Gene Scores') + ylab('LFC Magnitude') + 
     theme_minimal() + theme(legend.position='none')

rm(increase, base, pos_y_comparisons)

We know that in general there is a negative relation between mean expression and LFC in genes, and we also know that there is a strong relation between SFARI Gene Scores and the mean level of expression of the genes

This could explain the behaviour we found above, but we want to see if, once you control for the level of expression, the SFARI genes continue to have this relation to LFC or if it dissapears. (Being optimistic, perhaps the SFARI genes actually have higher LFC than genes with similar levels of expression, but we can’t see this in the original plot because of the relation between level of expression and LFC)

plot_data = genes_info %>% dplyr::select(ID, log2FoldChange, gene.score, significant) %>%
            left_join(data.frame('ID' = rownames(datExpr), 'meanExpr' = rowMeans(datExpr)), by = 'ID') %>%
            mutate(alpha = ifelse(gene.score == 'Others' , 0.1, ifelse(gene.score == 'Neuronal', 0.3, 0.7)))

increase = 1
base = 15.5
pos_y_comparisons = c(rep(base, 4), rep(base + increase, 2), base + 2:5*increase)
                      
p1 = plot_data %>% ggplot(aes(gene.score, meanExpr, fill=gene.score)) + 
     geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
     stat_compare_means(comparisons = comparisons, label = 'p.signif', method = 't.test', 
                        method.args = list(var.equal = FALSE), label.y = pos_y_comparisons, tip.length = .02) +       
     scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + 
     xlab('SFARI Gene Scores') + ylab('Mean Expression') + 
     theme_minimal() + theme(legend.position='none')

p2 = plot_data %>% ggplot(aes(meanExpr, abs(log2FoldChange), color = gene.score)) + 
     geom_point(alpha=plot_data$alpha) + geom_smooth(method='lm', color='#999999') + 
     ylab('LogFoldChange Magnitude') + xlab('Mean Expression') + 
     scale_color_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + 
     theme_minimal() + theme(legend.position = 'none')

p2 = ggExtra::ggMarginal(p2, type='density',groupColour = TRUE, size=10)

grid.arrange(p2, p1, ncol=2, widths = c(0.6, 0.4))

rm(p1,p2)

This plot shows that our above hypothesis was correct but perhaps doesn’t play a role as relevant as we had thought: In general, the higher the level of expression, the lower the LFC Magnitude, but this pattern flattens for the genes with the highest levels of expression of the whole dataset, which is the region where the SFARI Genes are

plot_data = data.frame('meanExpr' = rowMeans(datExpr), 'LFC_magnitude' = abs(genes_info$log2FoldChange), 
                       'gene.score' = genes_info$gene.score, 'p' = NA) %>% arrange(meanExpr)

w = 1000
for(i in 1:(nrow(plot_data)-w)){
  plot_data$p[i+floor(w/2)] = mean(plot_data$LFC_magnitude[i:(i+w)])
}

aux_data = plot_data %>% filter(!gene.score %in% c('Neuronal','Others')) %>% group_by(gene.score) %>%
           dplyr::summarise(mean_by_score = mean(meanExpr)) %>% ungroup %>% 
           mutate('color' = SFARI_colour_hue(r=1:6)[1:3])

ggplotly(plot_data %>% filter(!is.na(p)) %>% ggplot(aes(meanExpr, p)) + geom_line() +
         xlab('Mean Level of Expression') + ylab('Sliding Average of LFC Magnitude') +
         geom_vline(data = aux_data, aes(xintercept = mean_by_score), color = aux_data$color) + 
         ggtitle('Sliding Average of LFC Magnitude by Mean Level of Expression') + theme_minimal())
rm(aux_data)

Fold-Change Magnitude controlling by level of expression


We want to know what happens to the originally negative relation we found between SFARI Gene scores and lFC magnitude when we control for level of expression.

To do this, I’m going to compare each SFARI Gene with its closest non-SFARI neighbours following these steps:

  1. Select one SFARI gene

  2. Select its neighbours: 100 non-SFARI genes with the most similar mean level of Expression

  3. Standardise the lFC magnitude of each of the neighbours and of the SFARI gene (using the mean and sd of the lFC magnitude of only these 101 genes)

  4. Repeat this for each of the SFARI Genes, saving the standardised lFC magnitudes of all the SFARI genes and all the neighbours

  5. Compare the distribution of this value between these two groups (SFARI and their neighbours)


This plot shows the general idea of steps 1, 2, and 3, selecting a random SFARI gene:

  • The plot on the left shows the original mean expression and lFC magnitude of the SFARI Gene and its 100 closest neighbours

  • The plot on the right shows the standardised lFC mangitude of the genes, and the vertical lines represent the value that is going to be recorded for each of this genes to be compared afterwards

n = 100

plot_data = genes_info %>% dplyr::select(ID, log2FoldChange, gene.score) %>%
            left_join(data.frame('ID' = rownames(datExpr), 'meanExpr' = rowMeans(datExpr)), by = 'ID')

SFARI_gene = plot_data %>% filter(gene.score %in% c('1','2','3','4','5','6')) %>% sample_n(1) %>% 
             mutate(d=0, alpha = 1)
nn = plot_data %>% filter(gene.score %in% c('Neuronal','Others')) %>% 
     mutate(d = abs(meanExpr-SFARI_gene$meanExpr), alpha=0.5) %>% top_n(n=-n, wt = d)

plot_data = rbind(SFARI_gene, nn) %>% 
            mutate(std_magnitude = (abs(log2FoldChange) - mean(abs(log2FoldChange)))/sd(abs(log2FoldChange)))

p1 = plot_data %>% ggplot(aes(meanExpr, abs(log2FoldChange), color = gene.score)) + 
     geom_point(alpha = plot_data$alpha) + xlab('Mean Expression') + ylab('Log2 Fold Change Magnitude') + 
     scale_color_manual(values=SFARI_colour_hue(r=c(as.numeric(SFARI_gene$gene.score),8,7))) + 
     theme_minimal() + theme(legend.position='none')

p2 = plot_data %>% ggplot(aes(meanExpr, std_magnitude, color = gene.score)) + 
     geom_point(alpha = plot_data$alpha) + 
     geom_hline(aes(yintercept = mean(std_magnitude)), linetype = 'dashed', color = '#999999') + 
     scale_color_manual(values=SFARI_colour_hue(r=c(as.numeric(SFARI_gene$gene.score),8,7))) + 
     geom_segment(aes(x=SFARI_gene$meanExpr, y=mean(std_magnitude), xend = SFARI_gene$meanExpr, 
                  yend = std_magnitude[1]), alpha = 0.5, 
                  color = SFARI_colour_hue(r=1:8)[as.numeric(SFARI_gene$gene.score)]) +
     xlab('Mean Expression') + ylab('Standardised LFC Magnitude') + 
     theme_minimal() + theme(legend.position='none')
for(i in 1:15){
random_sample = plot_data %>% filter(gene.score != SFARI_gene$gene.score) %>% sample_n(1)
p2 = p2 + geom_segment(x=random_sample$meanExpr, xend = random_sample$meanExpr, y=mean(plot_data$std_magnitude), 
                       yend = random_sample$std_magnitude, alpha = 0.5, color = 'gray')  
}

grid.arrange(p1, p2, ncol=2, top='Comparing SFARI Genes with their n closest neighbours by Mean Expression')

cat(paste0('SFARI gene\'s standardised distance to its neigbours\'s LFC magnitude: ',
           round(plot_data$std_magnitude[1],4)))
## SFARI gene's standardised distance to its neigbours's LFC magnitude: 0.4089
rm(p1, p2, SFARI_gene, nn, random_sample, i)

As steps 4, and 5, say, we repeat this for all of the SFARI Genes, recording their standardised mangnitude as well as the ones from their neighbours so we can study them all together


Results


Even when controlling for the relation between Mean Expression and LFC by comparing each SFARI Gene only with neighbouring genes, we see the same results as before

  • Neuronal genes have consistently higher magnitudes of LFC than non-SFARI, non-neuronal genes (makes sense)

  • SFARI Genes have similar LFC Magnitude than genes without Neuronal annotations

get_std_lfc_magnitudes = function(data_info, SFARI_score, n){
  
  SFARI_genes = data_info %>% filter(gene.score == as.character(SFARI_score))
  
  std_magnitudes = data.frame(gene.score = as.character(), std_magnitude = as.numeric)
  
  for(i in 1:nrow(SFARI_genes)){
    SFARI_gene = SFARI_genes[i,]
    nn = data_info %>% filter(gene.score %in% c('Neuronal','Others')) %>%
         mutate(d = abs(meanExpr-SFARI_gene$meanExpr)) %>% top_n(n=-n, wt = d) %>% dplyr::select(-d)
    iter_data = rbind(SFARI_gene, nn) %>% 
          mutate(std_magnitude = (abs(log2FoldChange) - mean(abs(log2FoldChange)))/sd(abs(log2FoldChange))) %>%
          dplyr::select(gene.score, std_magnitude)
    std_magnitudes = rbind(std_magnitudes, iter_data)
  }
  
  return(std_magnitudes)
}

create_plot_by_SFARI_score = function(score, n) {
  
  std_magnitudes = get_std_lfc_magnitudes(data_info, score, n)
  
  plot = std_magnitudes %>% ggplot(aes(gene.score, std_magnitude)) + 
         geom_boxplot(aes(fill = gene.score), outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) + 
         xlab('') + ylab('Standardised LFC Magnitude') + 
         scale_fill_manual(values=SFARI_colour_hue(r=c(score,8,7))) + 
         coord_cartesian(ylim = c(min(std_magnitudes$std_magnitude), 3)) +
         stat_compare_means(method = 't.test', method.args = list(var.equal = FALSE), label = 'p.signif',
                        ref.group = as.character(score), label.y = 3) +
         theme_minimal() + theme(legend.position = 'none')
  
  return(plot)
}

data_info = genes_info %>% dplyr::select(ID, log2FoldChange, gene.score) %>% 
            left_join(data.frame('ID' = rownames(datExpr), 'meanExpr' = rowMeans(datExpr)), by = 'ID')

p1 = create_plot_by_SFARI_score(1, n)
p2 = create_plot_by_SFARI_score(2, n)
p3 = create_plot_by_SFARI_score(3, n)


grid.arrange(p1, p2, p3, nrow=1,
             top = 'Comparison of LFC Magnitude of SFARI gens and their closest neighbours by Mean Expression')

rm(p1, p2, p3)



Differential Expression


The proportion of DE genes for each SFARI Genes is much larger in SFARI Genes Score 1 and much lower in Score 2 than in the rest

Given the small number of differentially expressed genes in this dataset, this values are not that reliable

plot_info = genes_info %>% group_by(gene.score, significant) %>% tally(name = 'DE') %>% ungroup %>% ungroup %>%
            left_join(genes_info %>% group_by(gene.score) %>% tally(name = 'total') %>% 
                        ungroup, by = 'gene.score') %>% filter(significant) %>% 
            mutate('perc' = 100*DE/total)

ggplotly(plot_info %>% ggplot(aes(gene.score, perc, fill = gene.score)) + geom_bar(stat='identity') + 
         xlab('SFARI Gene Score') + ylab('% of DE genes') +  theme_minimal() + theme(legend.position = 'none') +
         scale_fill_manual(values=SFARI_colour_hue(r=c(1:3,8,7))))
table_info = genes_info %>% apply_labels(gene.score = 'SFARI Gene Score', 
                                         significant = 'Differentially Expressed')

cro(table_info$gene.score, list(table_info$significant, total()))
 Differentially Expressed     #Total 
 FALSE   TRUE   
 SFARI Gene Score 
   1  131 10   141
   2  183 2   185
   3  348 18   366
   Neuronal  772 50   822
   Others  11268 380   11648
   #Total cases  12702 460   13162
rm(table_info)

In our dataset, the higher the level of expression of a gene, the more likely the gene is to be DE (this can be seen by ordering the genes by level of expression and calculating the proportion of DE Genes using a sliding window). Based one this, the SFARI Scores 1 should have the highest proportion of DE Genes, followed by Scores 2 and 3 with a similar proportion, Score 2 slightly higher (instead, we see that SFARI Score 2 has less DE genes than SFARI Score 3)

plot_data = data.frame('meanExpr' = rowMeans(datExpr), 'DE' = genes_info$significant, 
                       'gene.score' = genes_info$gene.score, 'p' = NA) %>% arrange(meanExpr)

w = 3000
for(i in 1:(nrow(plot_data)-w)){
  plot_data$p[i+floor(w/2)] = mean(plot_data$DE[i:(i+w)])*100
}


aux_data = plot_data %>% filter(!gene.score %in% c('Neuronal','Others')) %>% group_by(gene.score) %>%
           dplyr::summarise(mean_by_score = mean(meanExpr)) %>% ungroup %>% 
           mutate('color' = SFARI_colour_hue(r=1:6)[1:3])

ggplotly(plot_data %>% filter(!is.na(p)) %>% ggplot(aes(meanExpr, p)) + geom_line() +
         xlab('Mean Level of Expression') + ylab('Sliding Percentage of DE Genes') +
         geom_vline(data = aux_data, aes(xintercept = mean_by_score), color = aux_data$color) + 
         ggtitle('Sliding Percentage of DE Genes by Mean Level of Expression') + theme_minimal())  
rm(aux_data, w, i)


Differential Expression controlling by level of expression


We want to see how the different scores in the SFARI Genes compare to other groups of genes with similar levels of expression when studying the proportion of DE genes

To do this, I’m going to compare each SFARI Gene with its closest non-SFARI neighbours following these steps:

  1. Select one SFARI gene

  2. Select its neighbours: 100 non-SFARI genes with the most similar mean level of Expression

  3. Calculate the % of these neighbours that are DE and store this value

  4. Repeat this for all of the genes in a specific SFARI score: We have a distribution of % DE neighbours and a single value indicating the percentage of DE genes in that SFARI score

4.1 Measure how annomalous the value for the SFARI scores is by calculating its distance to the distribution (in standard devitions)

  1. Repeat this for the other SFARI Gene scores


Notes:

SFARI Genes Scores 1 and 3 have a higher percentage of DE genes than their neighbours and Score 2 lower

As before, this result is not very reliable given the small number of DE genes in each SFARI Score in this dataset

get_neighbours_DE = function(data_info, SFARI_score, n){
  
  SFARI_genes = data_info %>% filter(gene.score == as.character(SFARI_score))
  
  perc_DE = data.frame(gene.score = as.character(), p_DE = as.numeric)
  
  for(i in 1:nrow(SFARI_genes)){
    SFARI_gene = SFARI_genes[i,]
    nn = data_info %>% filter(gene.score %in% c('Neuronal','Others')) %>% 
         mutate(d = abs(meanExpr-SFARI_gene$meanExpr)) %>% top_n(n = -n, wt = d) %>%
         group_by(gene.score) %>% summarise(perc_DE = 100*mean(significant)) %>% ungroup
    perc_DE = rbind(perc_DE, nn)
  }
  
  colnames(perc_DE) = c('gene.score', 'perc_DE')
  return(perc_DE)
}

calc_dist = function(SFARI_score, df, group){
  SFARI_p = 100*mean(genes_info$significant[genes_info$gene.score==SFARI_score])
  mean_nn = df$perc_DE[df$gene.score == group] %>% mean
  sd_nn = df$perc_DE[df$gene.score == group] %>% sd
  dist = round(abs(SFARI_p-mean_nn)/sd_nn, 2)
  
  return(dist)
}

create_plot_by_SFARI_score = function(score, n) {
  
  perc_DE_nn = get_neighbours_DE(data_info, score, n)
  
  plot = perc_DE_nn %>% ggplot(aes(gene.score, perc_DE, fill = gene.score)) + geom_boxplot() + 
         xlab('') + ylab('% of DE Genes') + 
         geom_hline(yintercept = 100*mean(genes_info$significant[genes_info$gene.score==as.character(score)]), 
                    color = SFARI_colour_hue(r=1:6)[score]) +
         ggtitle(paste0('Neighbours of SFARI Score ', score,' Genes',
                        '\n\n   Dist to Neuronal: ',calc_dist(as.character(score),perc_DE_nn,'Neuronal'),' SD',
                        '\n   Dist to Others: ', calc_dist(as.character(score), perc_DE_nn, 'Others'),' SD')) +
         scale_fill_manual(values=SFARI_colour_hue(r=c(8,7))) + theme_minimal() + theme(legend.position='none')
  
  return(plot)
}

data_info = genes_info %>% dplyr::select(ID, significant, gene.score) %>% 
            left_join(data.frame('ID' = rownames(datExpr), 'meanExpr' = rowMeans(datExpr)), by = 'ID')

n = 100
p1 = create_plot_by_SFARI_score(1, n)
p2 = create_plot_by_SFARI_score(2, n)
p3 = create_plot_by_SFARI_score(3, n)


grid.arrange(p1, p2, p3, nrow = 1)

rm(p1, p2, p3, get_neighbours_DE, calc_dist, create_plot_by_SFARI_score, n)



Effects of modifying filtering threshold by SFARI score


  • SFARI Genes Score 1 has the highest proportion of DE Genes at the beginning, but when we increase the LFC threshold, it quickly decreases.

, Score 2 has the lowest proportion of SFARI Genes, having only 2 that quickly disappear

  • Using the null hypothesis \(H_0: lfc=0\), 30/692 SFARI genes are statistically significant (~4%)

  • This results aren’t very robust given the small number of Differentially Expressed enes in each SFARI group

fc_list = seq(1, 1.1, 0.003)

all_counts = data.frame('group'='All', 'n'=as.character(nrow(genes_info)))
Others_counts = data.frame('group'='Others', n=as.character(sum(genes_info$gene.score == 'Others')))
Neuronal_counts = data.frame('group'='Neuronal', n=as.character(sum(genes_info$Neuronal)))
lfc_counts_all = genes_info %>% group_by(`gene-score`) %>% filter(`gene-score` != 'Others') %>% tally %>%
                 mutate('group'=as.factor(`gene-score`), 'n'=as.character(n)) %>%
                 dplyr::select(group, n) %>%
                 bind_rows(Neuronal_counts, Others_counts, all_counts) %>%
                 mutate('lfc'=-1) %>%  dplyr::select(lfc, group, n)

for(lfc in lfc_list){
  
  # Recalculate genes_info with the new threshold (p-values change)
  DE_genes = results(dds, lfcThreshold=log2(lfc), altHypothesis='greaterAbs') %>% data.frame
  
  DE_genes = DE_genes %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
             mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
             distinct(ID, .keep_all = TRUE) %>% left_join(GO_neuronal, by='ID') %>%
             mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
             mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`))
  
  DE_genes = DE_genes %>% filter(padj<0.05 & abs(log2FoldChange)>log2(lfc))

  
  # Calculate counts by groups
  all_counts = data.frame('group'='All', 'n'=as.character(nrow(DE_genes)))
  Others_counts = data.frame('group'='Others', n=as.character(sum(DE_genes$`gene-score` == 'Others')))
  Neuronal_counts = data.frame('group'='Neuronal', n=as.character(sum(DE_genes$Neuronal)))
  lfc_counts = DE_genes %>% group_by(`gene-score`) %>% tally %>%
               mutate('group'=`gene-score`, 'n'=as.character(n)) %>%
               bind_rows(Neuronal_counts, all_counts) %>%
               mutate('lfc'=lfc) %>% dplyr::select(lfc, group, n)
  
  
  # Update lfc_counts_all
  lfc_counts_all = lfc_counts_all %>% bind_rows(lfc_counts)
}

# Add missing entries with 0s
lfc_counts_all = expand.grid('group'=unique(lfc_counts_all$group), 'lfc'=unique(lfc_counts_all$lfc)) %>% 
  left_join(lfc_counts_all, by=c('group','lfc')) %>% replace(is.na(.), 0)

# Calculate percentage of each group remaining
tot_counts = genes_info %>% group_by(`gene-score`) %>% tally() %>% filter(`gene-score`!='Others') %>%
             mutate('group'=`gene-score`, 'tot'=n) %>% dplyr::select(group, tot) %>%
             bind_rows(data.frame('group'='Neuronal', 'tot'=sum(genes_info$Neuronal)),
                       data.frame('group'='Others', 'tot'=sum(genes_info$gene.score=='Others'&!genes_info$Neuronal)),
                       data.frame('group'='All', 'tot'=nrow(genes_info)))

lfc_counts_all = lfc_counts_all %>% filter(lfc!=-1) %>% #, group!='Others') %>% 
                 left_join(tot_counts, by='group') %>% mutate('perc'=round(100*as.numeric(n)/tot,2))


# Plot change of number of genes
ggplotly(lfc_counts_all %>% filter(group != 'All') %>% ggplot(aes(lfc, perc, color=group)) + 
         geom_point(aes(id=n)) + geom_line() + scale_color_manual(values=SFARI_colour_hue(r=c(1:3,8,7))) + 
         ylab('% of Differentially Expressed Genes') +  xlab('Fold Change') + 
         ggtitle('Effect of filtering thresholds by SFARI score') + theme_minimal())
rm(fc_list, all_counts, Neuronal_counts, lfc_counts_all, lfc, lfc_counts, lfc_counts_all, tot_counts,
   lfc_counts_all, Others_counts)


Conclusion



The patterns found in Gandal are much less clear in this dataset, many times not being statistically significant any more

  • In the case of differentially expressed genes, the reason is probably the relatively low number of differentially expressed genes found in this dataset when compared to Gandal, which makes all the results related to this metric unreliable

  • In the case of the Log Fold Change, we do see the same patterns, but they are not as significant as before, this is probably because the ASD signal is weaker in this dataset than in Gandal (as we can see in the Sample’s PCA visualization), making these measurements noisier

  • A final aspect that could be damaging the results in this dataset, when compared to Gandal, is that the genes have a much larger variance in level of expression between samples (around 1.5x larger, as it can be seen in the plot below), which can make the noise and biological signal more difficult to distinguish (which could be the cause behind the low number of DE genes)

Gupta_datExpr = datExpr
load('./../../../Gandal/AllRegions/Data/preprocessed_data.RData')
Gandal_datExpr = datExpr

plot_data = data.frame('ID' = rownames(Gandal_datExpr), 'Gandal_SD' = rowSdDiffs(Gandal_datExpr)) %>%
            inner_join(data.frame('ID' = rownames(Gupta_datExpr), 'Gupta_SD' = rowSdDiffs(Gupta_datExpr)), 
                       by = 'ID') %>%
            left_join(DE_info %>% data.frame %>% mutate(ID = rownames(.)), by = 'ID') %>%
            mutate(diff = Gandal_SD-Gupta_SD, abs_diff = abs(Gandal_SD-Gupta_SD)) %>%
                   mutate(std_diff = (diff-mean(diff))/sd(diff), distance = abs((diff-mean(diff))/sd(diff)))

plot_data %>% ggplot(aes(Gandal_SD, Gupta_SD)) + geom_point(alpha=0.1, color = '#cc0066') + 
              geom_abline(slope = 1, intercept = 0, color = 'gray', linetype = 'dashed') + 
              geom_smooth(alpha = 0.1, color = 'gray') + xlab('Gandal') + ylab('Gupta') + 
              coord_fixed() + scale_x_continuous(limits = c(0, max(plot_data$Gupta_SD))) + 
              ggtitle('SD Comparison between Datasets') + theme_minimal() 

rm(datExpr, datMeta, datGenes, dds, DE_info, Gupta_datExpr, Gandal_datExpr, plot_data)

Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.1.0            knitr_1.28                 
##  [3] expss_0.10.2                DESeq2_1.24.0              
##  [5] SummarizedExperiment_1.14.1 DelayedArray_0.10.0        
##  [7] BiocParallel_1.18.1         matrixStats_0.56.0         
##  [9] Biobase_2.44.0              GenomicRanges_1.36.1       
## [11] GenomeInfoDb_1.20.0         IRanges_2.18.3             
## [13] S4Vectors_0.22.1            BiocGenerics_0.30.0        
## [15] ClusterR_1.2.1              gtools_3.8.2               
## [17] Rtsne_0.15                  ggpubr_0.2.5               
## [19] magrittr_1.5                GGally_1.5.0               
## [21] gridExtra_2.3               viridis_0.5.1              
## [23] viridisLite_0.3.0           RColorBrewer_1.1-2         
## [25] plotlyutils_0.0.0.9000      plotly_4.9.2               
## [27] glue_1.4.1                  reshape2_1.4.4             
## [29] forcats_0.5.0               stringr_1.4.0              
## [31] dplyr_1.0.0                 purrr_0.3.4                
## [33] readr_1.3.1                 tidyr_1.1.0                
## [35] tibble_3.0.1                ggplot2_3.3.2              
## [37] tidyverse_1.3.0            
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.1.8        Hmisc_4.4-0           
##   [4] plyr_1.8.6             lazyeval_0.2.2         splines_3.6.3         
##   [7] gmp_0.5-13.6           crosstalk_1.1.0.1      digest_0.6.25         
##  [10] htmltools_0.4.0        fansi_0.4.1            checkmate_2.0.0       
##  [13] memoise_1.1.0          cluster_2.1.0          annotate_1.62.0       
##  [16] modelr_0.1.6           jpeg_0.1-8.1           colorspace_1.4-1      
##  [19] blob_1.2.1             rvest_0.3.5            haven_2.2.0           
##  [22] xfun_0.12              crayon_1.3.4           RCurl_1.98-1.2        
##  [25] jsonlite_1.7.0         genefilter_1.66.0      survival_3.1-12       
##  [28] gtable_0.3.0           zlibbioc_1.30.0        XVector_0.24.0        
##  [31] webshot_0.5.2          scales_1.1.1           DBI_1.1.0             
##  [34] miniUI_0.1.1.1         Rcpp_1.0.4.6           xtable_1.8-4          
##  [37] htmlTable_1.13.3       foreign_0.8-76         bit_1.1-15.2          
##  [40] Formula_1.2-3          htmlwidgets_1.5.1      httr_1.4.1            
##  [43] acepack_1.4.1          ellipsis_0.3.1         pkgconfig_2.0.3       
##  [46] reshape_0.8.8          XML_3.99-0.3           farver_2.0.3          
##  [49] nnet_7.3-14            dbplyr_1.4.2           locfit_1.5-9.4        
##  [52] later_1.0.0            tidyselect_1.1.0       labeling_0.3          
##  [55] rlang_0.4.6            AnnotationDbi_1.46.1   munsell_0.5.0         
##  [58] cellranger_1.1.0       tools_3.6.3            cli_2.0.2             
##  [61] generics_0.0.2         RSQLite_2.2.0          broom_0.5.5           
##  [64] fastmap_1.0.1          evaluate_0.14          yaml_2.2.1            
##  [67] bit64_0.9-7            fs_1.4.0               nlme_3.1-147          
##  [70] mime_0.9               ggExtra_0.9            xml2_1.2.5            
##  [73] compiler_3.6.3         rstudioapi_0.11        png_0.1-7             
##  [76] ggsignif_0.6.0         reprex_0.3.0           geneplotter_1.62.0    
##  [79] stringi_1.4.6          highr_0.8              lattice_0.20-41       
##  [82] Matrix_1.2-18          vctrs_0.3.1            pillar_1.4.4          
##  [85] lifecycle_0.2.0        data.table_1.12.8      bitops_1.0-6          
##  [88] httpuv_1.5.2           R6_2.4.1               latticeExtra_0.6-29   
##  [91] promises_1.1.0         assertthat_0.2.1       withr_2.2.0           
##  [94] GenomeInfoDbData_1.2.1 mgcv_1.8-31            hms_0.5.3             
##  [97] grid_3.6.3             rpart_4.1-15           rmarkdown_2.1         
## [100] shiny_1.4.0.2          lubridate_1.7.4        base64enc_0.1-3